Chapter 5 Community composition
5.1 Taxonomy overview
5.1.1 Stacked barplot
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ individual + time_point, scales="free", labeller=labeller(time_point=c("Pre_antibiotics"="Pre_ant", "Post_antibiotics_1"="Post_ant_1", "Post_antibiotics_2"="Post_ant_2")) ) + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")5.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum) %>%
summarise(relabun=sum(count))
phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean) %>%
tt()| phylum | mean | sd |
|---|---|---|
| p__Bacteroidota | 0.524440141 | 0.127316510 |
| p__Bacillota_A | 0.218129853 | 0.075749924 |
| p__Pseudomonadota | 0.100829714 | 0.168802352 |
| p__Bacillota | 0.065661524 | 0.051944414 |
| p__Verrucomicrobiota | 0.047313902 | 0.048648719 |
| p__Cyanobacteriota | 0.015633063 | 0.021233104 |
| p__Desulfobacterota | 0.013819770 | 0.006889224 |
| p__Bacillota_C | 0.012453388 | 0.013842018 |
| p__Bacillota_B | 0.001718646 | 0.002287117 |
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
geom_jitter(alpha=0.5) +
theme_minimal() +
theme(legend.position="none") +
labs(y="Phylum",x="Relative abundance")5.2 Taxonomy boxplot
5.2.1 Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family) %>%
summarise(relabun=sum(count))
family_summary %>%
group_by(family) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean) %>%
tt()| family | mean | sd |
|---|---|---|
| f__Bacteroidaceae | 0.2111761804 | 0.139716297 |
| f__Rikenellaceae | 0.1262345009 | 0.041183911 |
| f__Lachnospiraceae | 0.1221559156 | 0.056348298 |
| f__Tannerellaceae | 0.0597688994 | 0.038763633 |
| f__ | 0.0558781360 | 0.053517532 |
| f__Erysipelotrichaceae | 0.0503603704 | 0.049830980 |
| f__Marinifilaceae | 0.0499502203 | 0.026876894 |
| f__UBA932 | 0.0335510817 | 0.041786274 |
| f__Ruminococcaceae | 0.0326684282 | 0.026453449 |
| f__Akkermansiaceae | 0.0298155750 | 0.049482943 |
| f__Oscillospiraceae | 0.0271637611 | 0.018607679 |
| f__Burkholderiaceae_A | 0.0269794972 | 0.057963873 |
| f__Gastranaerophilaceae | 0.0156330633 | 0.021233104 |
| f__Burkholderiaceae_B | 0.0142393093 | 0.042253051 |
| f__Desulfovibrionaceae | 0.0138197700 | 0.006889224 |
| f__Alteromonadaceae | 0.0126841890 | 0.037279818 |
| f__Enterobacteriaceae | 0.0110005401 | 0.018192813 |
| f__Butyricicoccaceae | 0.0109420628 | 0.006933355 |
| f__CAG-239 | 0.0106659324 | 0.014861691 |
| f__CALVMC01 | 0.0098587671 | 0.017356533 |
| f__Moraxellaceae | 0.0087097650 | 0.019763859 |
| f__Burkholderiaceae | 0.0081400791 | 0.026997588 |
| f__JADKCL01 | 0.0076424144 | 0.025347021 |
| f__UBA660 | 0.0069343163 | 0.015713124 |
| f__Acutalibacteraceae | 0.0056333801 | 0.007365987 |
| f__Muribaculaceae | 0.0046063404 | 0.003466581 |
| f__Coprobacillaceae | 0.0044443621 | 0.004445464 |
| f__UBA3830 | 0.0042073343 | 0.010172731 |
| f__Mycoplasmoidaceae | 0.0039224751 | 0.006889425 |
| f__Flavobacteriaceae | 0.0036983513 | 0.012266044 |
| f__UBA3700 | 0.0036927654 | 0.012247517 |
| f__Aeromonadaceae | 0.0032090159 | 0.007940373 |
| f__CHK158-818 | 0.0025811994 | 0.002580585 |
| f__Anaerotignaceae | 0.0023321053 | 0.002969332 |
| f__Peptococcaceae | 0.0017186456 | 0.002287117 |
| f__Anaerovoracaceae | 0.0015862187 | 0.004037860 |
| f__UBA1829 | 0.0014009798 | 0.002725359 |
| f__Pseudomonadaceae | 0.0009940515 | 0.003108676 |
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
# Per time_point
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~time_point)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")5.2.2 Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,phylum,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__") %>%
mutate(genus= sub("^g__", "", genus))
genus_summary_sort <- genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary_sort %>%
tt()| genus | mean | sd |
|---|---|---|
| Bacteroides | 0.1775249332 | 0.109314357 |
| Parabacteroides | 0.0554095726 | 0.035124121 |
| Mucinivorans | 0.0552600046 | 0.049550217 |
| Odoribacter | 0.0401415283 | 0.024399876 |
| Alistipes | 0.0397499792 | 0.028185247 |
| Clostridium_Q | 0.0363715276 | 0.020090245 |
| Egerieousia | 0.0335510817 | 0.041786274 |
| SZUA-378 | 0.0290183038 | 0.040693410 |
| JAEZVV01 | 0.0269794972 | 0.057963873 |
| Akkermansia | 0.0223731622 | 0.046989793 |
| Bacteroides_G | 0.0189263693 | 0.040746534 |
| Paucibacter_A | 0.0142393093 | 0.042253051 |
| Amedibacillus | 0.0141985824 | 0.029133436 |
| Dielma | 0.0141336576 | 0.016330606 |
| Intestinimonas | 0.0133394420 | 0.010705642 |
| Pararheinheimera | 0.0126841890 | 0.037279818 |
| Pseudoflavonifractor | 0.0125791301 | 0.011106296 |
| Citrobacter | 0.0110005401 | 0.018192813 |
| Hydrogenoanaerobacterium | 0.0094715505 | 0.010554697 |
| 14-2 | 0.0092323435 | 0.007386213 |
| Anaerotruncus | 0.0089714879 | 0.009209991 |
| Acinetobacter | 0.0087097650 | 0.019763859 |
| Avirikenella | 0.0084205153 | 0.010938908 |
| Pseudoduganella | 0.0081400791 | 0.026997588 |
| Bilophila | 0.0071557312 | 0.004629539 |
| Spyradomonas | 0.0070402620 | 0.008193530 |
| Hungatella_A | 0.0066989150 | 0.009041871 |
| Tidjanibacter | 0.0060839003 | 0.005550803 |
| CAJGBR01 | 0.0056953492 | 0.003603446 |
| JADFUS01 | 0.0056704093 | 0.005147440 |
| DUWA01 | 0.0056452122 | 0.015640734 |
| Mobilisporobacter | 0.0054593818 | 0.007616962 |
| JAIHAL01 | 0.0047986372 | 0.007841867 |
| HGM05232 | 0.0046063404 | 0.003466581 |
| Coprobacillus | 0.0044443621 | 0.004445464 |
| Parabacteroides_B | 0.0043593269 | 0.004775063 |
| CAZU01 | 0.0041733530 | 0.011570149 |
| Mycoplasma_L | 0.0039224751 | 0.006889425 |
| Flavobacterium | 0.0036983513 | 0.012266044 |
| UMGS1251 | 0.0033243891 | 0.006216785 |
| Negativibacillus | 0.0032342216 | 0.004905132 |
| Aeromonas | 0.0032090159 | 0.007940373 |
| Scatacola_A | 0.0030305489 | 0.007129984 |
| OM05-12 | 0.0028388258 | 0.003946216 |
| Gallibacteroides | 0.0025811994 | 0.002580585 |
| JAAYQI01 | 0.0023321053 | 0.002969332 |
| Hespellia | 0.0021290168 | 0.004757466 |
| Massiliimalia | 0.0019923222 | 0.004302461 |
| Intestinibacillus | 0.0018143641 | 0.003265593 |
| Emergencia | 0.0015862187 | 0.004037860 |
| UBA1829 | 0.0014009798 | 0.002725359 |
| MGBC107952 | 0.0012891041 | 0.004275474 |
| Evtepia | 0.0012451890 | 0.002490315 |
| Pseudomonas_E | 0.0009940515 | 0.003108676 |
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
#Per time_point
genus_summary %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
geom_jitter(alpha=0.5) +
facet_grid(.~time_point)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")